{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# This script obtaining frames from camera,using mtcnn detecting faces,croping and embedding faces with pre-trained facenet and finally face recogition with pre-trained classifier.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python2.7/dist-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.\n",
      "  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Populating the interactive namespace from numpy and matplotlib\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python2.7/dist-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
      "  \"This module will be removed in 0.20.\", DeprecationWarning)\n"
     ]
    }
   ],
   "source": [
    "\n",
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "import cv2\n",
    "import matplotlib.pyplot as plt\n",
    "%pylab inline\n",
    "import os\n",
    "from os.path import join as pjoin\n",
    "import sys\n",
    "import copy\n",
    "import detect_face\n",
    "import nn4 as network\n",
    "import random\n",
    "\n",
    "\n",
    "import sklearn\n",
    "\n",
    "from sklearn.externals import joblib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#face detection parameters\n",
    "minsize = 20 # minimum size of face\n",
    "threshold = [ 0.6, 0.7, 0.7 ]  # three steps's threshold\n",
    "factor = 0.709 # scale factor\n",
    "\n",
    "#facenet embedding parameters\n",
    "\n",
    "model_dir='./model_check_point/model.ckpt-500000'#\"Directory containing the graph definition and checkpoint files.\")\n",
    "model_def= 'models.nn4'  # \"Points to a module containing the definition of the inference graph.\")\n",
    "image_size=96 #\"Image size (height, width) in pixels.\"\n",
    "pool_type='MAX' #\"The type of pooling to use for some of the inception layers {'MAX', 'L2'}.\n",
    "use_lrn=False #\"Enables Local Response Normalization after the first layers of the inception network.\"\n",
    "seed=42,# \"Random seed.\"\n",
    "batch_size= None # \"Number of images to process in a batch.\"\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "frame_interval=3 # frame intervals  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def to_rgb(img):\n",
    "  w, h = img.shape\n",
    "  ret = np.empty((w, h, 3), dtype=np.uint8)\n",
    "  ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img\n",
    "  return ret"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Creating networks and loading parameters\n"
     ]
    }
   ],
   "source": [
    "#restore mtcnn model\n",
    "\n",
    "print('Creating networks and loading parameters')\n",
    "gpu_memory_fraction=1.0\n",
    "with tf.Graph().as_default():\n",
    "    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction)\n",
    "    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))\n",
    "    with sess.as_default():\n",
    "        pnet, rnet, onet = detect_face.create_mtcnn(sess, './model_check_point/')\n",
    "        \n",
    "      "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "建立facenet embedding模型\n",
      "name =  incept3a\n",
      "inputSize =  192\n",
      "kernelSize = {3,5}\n",
      "kernelStride = {1,1}\n",
      "outputSize = {128,32}\n",
      "reduceSize = {96,16,32,64}\n",
      "pooling = {MAX, 3, 3, 1, 1}\n",
      "outputSize =  256\n",
      "\n",
      "name =  incept3b\n",
      "inputSize =  256\n",
      "kernelSize = {3,5}\n",
      "kernelStride = {1,1}\n",
      "outputSize = {128,64}\n",
      "reduceSize = {96,32,64,64}\n",
      "pooling = {MAX, 3, 3, 1, 1}\n",
      "outputSize =  320\n",
      "\n",
      "name =  incept3c\n",
      "inputSize =  320\n",
      "kernelSize = {3,5}\n",
      "kernelStride = {2,2}\n",
      "outputSize = {256,64}\n",
      "reduceSize = {128,32,0,0}\n",
      "pooling = {MAX, 3, 3, 2, 2}\n",
      "outputSize =  640\n",
      "\n",
      "name =  incept4a\n",
      "inputSize =  640\n",
      "kernelSize = {3,5}\n",
      "kernelStride = {1,1}\n",
      "outputSize = {192,64}\n",
      "reduceSize = {96,32,128,256}\n",
      "pooling = {MAX, 3, 3, 1, 1}\n",
      "outputSize =  640\n",
      "\n",
      "name =  incept4b\n",
      "inputSize =  640\n",
      "kernelSize = {3,5}\n",
      "kernelStride = {1,1}\n",
      "outputSize = {224,64}\n",
      "reduceSize = {112,32,128,224}\n",
      "pooling = {MAX, 3, 3, 1, 1}\n",
      "outputSize =  640\n",
      "\n",
      "name =  incept4c\n",
      "inputSize =  640\n",
      "kernelSize = {3,5}\n",
      "kernelStride = {1,1}\n",
      "outputSize = {256,64}\n",
      "reduceSize = {128,32,128,192}\n",
      "pooling = {MAX, 3, 3, 1, 1}\n",
      "outputSize =  640\n",
      "\n",
      "name =  incept4d\n",
      "inputSize =  640\n",
      "kernelSize = {3,5}\n",
      "kernelStride = {1,1}\n",
      "outputSize = {288,64}\n",
      "reduceSize = {144,32,128,160}\n",
      "pooling = {MAX, 3, 3, 1, 1}\n",
      "outputSize =  640\n",
      "\n",
      "name =  incept4e\n",
      "inputSize =  640\n",
      "kernelSize = {3,5}\n",
      "kernelStride = {2,2}\n",
      "outputSize = {256,128}\n",
      "reduceSize = {160,64,0,0}\n",
      "pooling = {MAX, 3, 3, 2, 2}\n",
      "outputSize =  1024\n",
      "\n",
      "name =  incept5a\n",
      "inputSize =  1024\n",
      "kernelSize = {3,5}\n",
      "kernelStride = {1,1}\n",
      "outputSize = {384,0}\n",
      "reduceSize = {192,0,128,384}\n",
      "pooling = {MAX, 3, 3, 1, 1}\n",
      "outputSize =  896\n",
      "\n",
      "name =  incept5b\n",
      "inputSize =  896\n",
      "kernelSize = {3,5}\n",
      "kernelStride = {1,1}\n",
      "outputSize = {384,0}\n",
      "reduceSize = {192,0,128,384}\n",
      "pooling = {MAX, 3, 3, 1, 1}\n",
      "outputSize =  896\n",
      "\n",
      "facenet embedding模型建立完毕\n"
     ]
    }
   ],
   "source": [
    "#restore facenet model\n",
    "print('建立facenet embedding模型')\n",
    "tf.Graph().as_default()\n",
    "sess = tf.Session()\n",
    "images_placeholder = tf.placeholder(tf.float32, shape=(batch_size, \n",
    "                                                       image_size, \n",
    "                                                       image_size, 3), name='input')\n",
    "\n",
    "phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')\n",
    "\n",
    "\n",
    "\n",
    "embeddings = network.inference(images_placeholder, pool_type, \n",
    "                               use_lrn, \n",
    "                               1.0, \n",
    "                               phase_train=phase_train_placeholder)\n",
    "\n",
    "\n",
    "\n",
    "ema = tf.train.ExponentialMovingAverage(1.0)\n",
    "saver = tf.train.Saver(ema.variables_to_restore())\n",
    "#ckpt = tf.train.get_checkpoint_state(os.path.expanduser(model_dir))\n",
    "#saver.restore(sess, ckpt.model_checkpoint_path)\n",
    "\n",
    "model_checkpoint_path='./model_check_point/model-20160506.ckpt-500000'\n",
    "#ckpt = tf.train.get_checkpoint_state(os.path.expanduser(model_dir))\n",
    "#model_checkpoint_path='model-20160506.ckpt-500000'\n",
    "\n",
    "\n",
    "#saver.restore(sess, ckpt.model_checkpoint_path)\n",
    "saver.restore(sess, model_checkpoint_path)\n",
    "print('facenet embedding模型建立完毕')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#restore pre-trained knn classifier\n",
    "model = joblib.load('./model_check_point/knn_classifier.model')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# real time face detection and recognition"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#obtaining frames from camera--->converting to gray--->converting to rgb\n",
    "#--->detecting faces---->croping faces--->embedding--->classifying--->print\n",
    "\n",
    "\n",
    "video_capture = cv2.VideoCapture(0)\n",
    "c=0\n",
    " \n",
    "while True:\n",
    "    # Capture frame-by-frame\n",
    "\n",
    "    ret, frame = video_capture.read()\n",
    "    #gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)\n",
    "    #print(frame.shape)\n",
    "    \n",
    "    timeF = frame_interval\n",
    "    \n",
    "    \n",
    "    if(c%timeF == 0): #frame_interval==3, face detection every 3 frames\n",
    "        \n",
    "        find_results=[]\n",
    "        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)\n",
    "        \n",
    "        \n",
    "        if gray.ndim == 2:\n",
    "            img = to_rgb(gray)\n",
    "        \n",
    "            \n",
    "\n",
    "        bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)\n",
    "\n",
    "        \n",
    "        \n",
    "        nrof_faces = bounding_boxes.shape[0]#number of faces\n",
    "        #print('找到人脸数目为：{}'.format(nrof_faces))\n",
    "        \n",
    "\n",
    "        for face_position in bounding_boxes:\n",
    "            \n",
    "            face_position=face_position.astype(int)\n",
    "            \n",
    "            #print((int(face_position[0]), int( face_position[1])))\n",
    "            #word_position.append((int(face_position[0]), int( face_position[1])))\n",
    "           \n",
    "            cv2.rectangle(frame, (face_position[0], \n",
    "                            face_position[1]), \n",
    "                      (face_position[2], face_position[3]), \n",
    "                      (0, 255, 0), 2)\n",
    "            \n",
    "            crop=img[face_position[1]:face_position[3],face_position[0]:face_position[2],]\n",
    "    \n",
    "            crop = cv2.resize(crop, (96, 96), interpolation=cv2.INTER_CUBIC )\n",
    "        \n",
    "            data=crop.reshape(-1,96,96,3)\n",
    "        \n",
    "            emb_data = sess.run([embeddings], \n",
    "                                feed_dict={images_placeholder: np.array(data), \n",
    "                                           phase_train_placeholder: False })[0]\n",
    "            \n",
    "            predict = model.predict(emb_data) \n",
    "         \n",
    "       \n",
    "            if predict==1:\n",
    "                find_results.append('me')\n",
    "            elif predict==2:\n",
    "                find_results.append('others')\n",
    "\n",
    "        \n",
    "    \n",
    " \n",
    "        cv2.putText(frame,'detected:{}'.format(find_results), (50,100), \n",
    "                cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (255, 0 ,0), \n",
    "                thickness = 2, lineType = 2)\n",
    "  \n",
    "            \n",
    "    #print(faces)\n",
    "    c+=1\n",
    "    # Draw a rectangle around the faces\n",
    "    \n",
    "\n",
    "\n",
    "    # Display the resulting frame\n",
    "\n",
    "    cv2.imshow('Video', frame)\n",
    "\n",
    "    if cv2.waitKey(1) & 0xFF == ord('q'):\n",
    "        break\n",
    "    \n",
    "\n",
    "\n",
    "# When everything is done, release the capture\n",
    "\n",
    "video_capture.release()\n",
    "cv2.destroyAllWindows()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
